To interrogate the different maturation states required in these two differentiation pathways we performed a single-cell transcriptional and chromatin accessibility profiling of the PCs together with a representative subset of Germinal Center B Cells (GCBC) and MBCs. A total of 12,779 cells were analyzed at scRNA-seq level which clustered into 23 subpopulations, while 4,160 cells were grouped into 13 subpopulations at the scATAC-seq level to re-force cell transfer annotation and interpretation.
library(Seurat)
library(Signac)
library(GenomicRanges)
library(pheatmap)
library(JASPAR2020)
library(TFBSTools)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(writexl)
library(plyr)
library(stringr)
library(grid)
library(gridExtra)
library(EnhancedVolcano)
library(qlcMatrix)
set.seed(333)
cell_type = "PC"
# Paths
path_to_obj <- paste0(
here::here("scATAC-seq/results/R_objects/level_5/"),
cell_type,
"/05.",
cell_type,
"_chromVar_CISBP_level_5.rds",
sep = ""
)
path_to_obj_RNA <- paste0(
here::here("scRNA-seq/3-clustering/5-level_5/"),
cell_type,
"/PC_seu_obj_level_5_eta.rds")
color_palette <- c("#73787E", "#B8BCC1","#FECFC7" ,
"#FF8E7B","#a13b53","#A6E1F4",
"#586BA4", "#323734","#035F72",
"#9CC6CF" ,"#198C19","#006600","#FFD8B1")
cels_order <- c("Dark Zone GCBC",
"Light Zone GCBC",
"PC committed Light Zone GCBC",
"Early PC precursor",
"PB",
"IgG+ PC precursor",
"IgM+ PC precursor",
"IgD PC precursor",
"preMature IgG+ PC",
"preMature IgM+ PC",
"Mature PC",
"MBC derived PC precursor",
"class switch MBC")
colfunc <- colorRampPalette(c("#4575B4", "white","darkgreen"))
DARS <- function(ident.1,ident.2){
DARs <- FindMarkers(
ident.1 = ident.1,
ident.2 = ident.2,
object = seurat,
min.pct = 0.05,
test.use = 'LR',
latent.vars = 'nCount_peaks_level_5')
return(DARs)}
DARS_ChromVar <- function(ident.1,ident.2){
DARs <- FindMarkers(
ident.1 = ident.1,
ident.2 = ident.2,
object = seurat,
min.pct = 0.05,
test.use = 'LR')
return(DARs)}
DARS_matrix <- function(DARs, group){
avgexpr_mat <- AverageExpression(
features = row.names(DARs),
seurat,
assays = "peaks_level_5",
return.seurat = F,
group.by = group,
slot = "data")
return(avgexpr_mat)}
DARS_filtered_max <- function(mat, quant, cluster_interest){
matrix_coefficient <- rowMax(mat) / rowSums(mat)
hist(matrix_coefficient, breaks = 100)
abline(v = quantile(matrix_coefficient, quant),
col="red", lwd=3, lty=2)
regions <- which(matrix_coefficient > quantile(matrix_coefficient,
quant))
filtered_regions <- mat[regions,]
filtered_regions_interest_clusters <- filtered_regions[which(apply(X = filtered_regions,
MARGIN = 1,FUN = which.max) %in% cluster_interest),]
return(filtered_regions_interest_clusters)}
pheatmap_plot <- function(matrices,n_cutree_rows,
annotation_col,mycolors_list){
pheatmap(matrices,
scale = "row",
annotation_col = annotation_col,
annotation_colors = mycolors_list,
color = colfunc(100),
angle_col = 45,
show_rownames=F,
border_color = "white",
cluster_rows = T,
cluster_cols = F,
fontsize_col = 10,
cutree_rows = n_cutree_rows,
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2")}
seurat_RNA <- readRDS(path_to_obj_RNA)
Idents(seurat_RNA) <- "names_level_5_clusters_eta"
seurat_RNA
## An object of class Seurat
## 37378 features across 12779 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
seurat <- readRDS(path_to_obj)
seurat_peaks <- seurat@assays$peaks_level_5@ranges
seurat
## An object of class Seurat
## 107847 features across 4160 samples within 2 assays
## Active assay: peaks_level_5 (106083 features, 104344 variable features)
## 1 other assay present: chromvar
## 3 dimensional reductions calculated: umap, lsi, harmony
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/1.UMAP_scATAC.pdf",
# width=5,height=5,paper='special')
DimPlot(seurat,
reduction = "umap",
pt.size = 1,
cols= color_palette,
label = FALSE) + NoLegend() + theme(plot.title = element_blank())
A pairwise differential accessibility analysis between the major biological identities revealed an extensive chromatin modulation along the primary reaction pathway.
DefaultAssay(seurat) <- "peaks_level_5"
step1 <- DARS(ident.1 = "Light Zone GCBC",
ident.2 = "PC committed Light Zone GCBC")
step2 <- DARS(ident.1 = "PC committed Light Zone GCBC",
ident.2 = "IgG+ PC precursor")
step3 <- DARS(ident.1 = "IgG+ PC precursor",
ident.2 = "Mature PC")
step4 <- DARS(ident.1 = "class switch MBC",
ident.2 = "Mature PC")
all_DARs <- rbind(step1,step2,step3, step4)
EnhancedVolcano(all_DARs,
lab = NA,
pCutoff = 10e-03,
FCcutoff = 0.5,
labSize = 4,
legendLabSize = 8,
x = 'avg_log2FC', y = 'p_val_adj')
The following statistical cutoffs where applied to filter the DARs: avg_log2FC >= 0,5 and p adjusted value <= 10e-03.
step1_filtered <- step1[which(abs(step1$avg_log2FC) >= 0.5 &
step1$p_val_adj <= 10e-03),]
step2_filtered <- step2[which(abs(step2$avg_log2FC) >= 0.5 &
step2$p_val_adj <= 10e-03),]
step3_filtered <- step3[which(abs(step3$avg_log2FC) >= 0.5 &
step3$p_val_adj <= 10e-03),]
step4_filtered <- step4[which(abs(step4$avg_log2FC) >= 0.5 &
step4$p_val_adj <= 10e-03),]
nrow(step1_filtered)
## [1] 228
nrow(step2_filtered)
## [1] 5774
nrow(step3_filtered)
## [1] 446
nrow(step4_filtered)
## [1] 2892
DARS <- rev(c(nrow(step1_filtered),nrow(step2_filtered),
nrow(step3_filtered),nrow(step4_filtered)))
steps <- rev(c("step1", "step2", "step3", "step4"))
df <- data.frame(steps, round(DARS/sum(DARS) * 100,2))
colnames(df) <- c("steps","DARS")
#ggsave("/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/DARs_barplot.pdf")
ggbarplot(df, "steps", "DARS",
orientation = "horiz",
label = TRUE) + scale_y_continuous(limits=c(0, 100))
To evaluate the general landscape of the chromatin remodeling during PC differentiation, we combined the obtained DARs into a unique heatmap. A hierarchical clustering based on the Euclidean distance measure and ward.D2 method revealed three main modules of chromatin dynamics.
DefaultAssay(seurat) <- "peaks_level_5"
step1_mat <- DARS_matrix(DARs = step1_filtered,
group = "annotation_level_5")
step2_mat <- DARS_matrix(DARs = step2_filtered,
group = "annotation_level_5")
step3_mat <- DARS_matrix(DARs = step3_filtered,
group = "annotation_level_5")
step4_mat <- DARS_matrix(DARs = step4_filtered,
group = "annotation_level_5")
path_matrix <- rbind(step1_mat$peaks_level_5,
step2_mat$peaks_level_5,
step3_mat$peaks_level_5,
step4_mat$peaks_level_5)
cell_types <- names(table(seurat$annotation_level_5))
annotation_col = data.frame(
cell_type = cell_types)
rownames(annotation_col) <- cell_types
mycolors <- color_palette
names(mycolors) <- cell_types
mycolors_list <- list(mycolors)
names(mycolors_list) <- "cell_type"
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/3.Heatmap_DARs.pdf",
# width = 8,height = 6)
entire_heatmap <- pheatmap_plot(path_matrix[,cels_order],n_cutree_rows = 3,
annotation_col,
mycolors_list)
entire_heatmap
cutree_out = cutree(entire_heatmap$tree_row, k=3)
cutree_df <- as.data.frame(cbind(names(cutree_out),cutree_out))
cluster1 <- cutree_df[cutree_df$cutree_out == 1,]
cluster2 <- cutree_df[cutree_df$cutree_out == 2,]
cluster3 <- cutree_df[cutree_df$cutree_out == 3,]
As module-specific DARs likely contain specific DNA motifs that allow binding of specific TF, we performed an enrichment analysis and identified several key binding motifs of expressed TF related to the cell identity.
Motif_enr_cluster1 <- FindMotifs(object = seurat,
features = cluster1$V1)
Motif_enr_cluster1$cluster <- 1
avgexpr_mat <- AverageExpression(
features = Motif_enr_cluster1$motif.name,
seurat_RNA,
return.seurat = F,
group.by = "annotation_level_1",
slot = "data")
# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
GCBC > 0, NBC_MBC > 0, PC > 0)
DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster1)
FeaturePlot(
object = seurat,
features = c("ENSG00000164330-LINE255-EBF1-D-N1",
"ENSG00000196092-LINE3170-PAX5-D-N8"),
min.cutoff = 'q1',
max.cutoff = 'q99',
pt.size = 0.1,
ncol = 2
)
clustering1_filtered <- DARS_filtered_max(path_matrix[cluster1$V1,], 0.6, 3)
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/4.Heatmap_PC_committed_DARs.pdf")
pheatmap_plot(clustering1_filtered[,cels_order],n_cutree_rows = 1, annotation_col, mycolors_list)
seurat <- AddModuleScore(
seurat,
name = 'PC-committed-module',
features = list(row.names(clustering1_filtered)))
colfunc <- colorRampPalette(c("#4575B4", "white","darkgreen"))
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/5.Module_PC-committed_DARs.pdf",
# width=5,height=5,paper='special')
FeaturePlot(seurat,
order = TRUE,
min.cutoff = 'q1',
max.cutoff = 'q99',
pt.size = 0.8,
features = "PC.committed.module1") +
theme(plot.title = element_blank()) +
scale_color_gradientn( colours = c('lightgrey', 'darkgreen'))
clustering1_filtered_motifs <- FindMotifs(object = seurat,
features = row.names(clustering1_filtered))
DT::datatable(clustering1_filtered_motifs)
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/6.Volcano_plot_POU.pdf",
# width=5,height=8,paper='special')
EnhancedVolcano(clustering1_filtered_motifs,
pCutoff = 10e-03,
FCcutoff = 0.5,
labSize = 4,
legendLabSize = 8,
lab = clustering1_filtered_motifs$motif.name,
x = 'fold.enrichment', y = 'pvalue')
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/7.UMAP_POU.pdf",
# width=5,height=5,paper='special')
FeaturePlot(seurat,
order = TRUE,
features = "ENSG00000137709-LINE2649-POU2F3-D-N1",
min.cutoff = 'q1',
max.cutoff = 'q99',
pt.size = 0.8) +
theme(plot.title = element_blank()) + scale_color_gradientn( colours = c('lightgrey', 'chartreuse3'))
Motif_enr_cluster2 <- FindMotifs(object = seurat,
features = cluster2$V1)
avgexpr_mat <- AverageExpression(
features = Motif_enr_cluster2$motif.name,
seurat_RNA,
return.seurat = F,
group.by = "annotation_level_1",
slot = "data")
# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
GCBC > 0, NBC_MBC > 0, PC > 0)
DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster2)
FeaturePlot(
object = seurat,
features = c("ENSG00000137265-LINE2748-IRF4-D-N1",
"ENSG00000140968-LINE2752-IRF8-D-N2"),
min.cutoff = 'q1',
max.cutoff = 'q99',
pt.size = 0.1,
ncol = 2
)
Motif_enr_cluster3 <- FindMotifs(object = seurat,
features = cluster3$V1)
avgexpr_mat <- AverageExpression(
features = Motif_enr_cluster3$motif.name,
seurat_RNA,
return.seurat = F,
group.by = "annotation_level_1",
slot = "data")
# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
GCBC > 0, NBC_MBC > 0, PC > 0)
DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster3)
FeaturePlot(
object = seurat,
features = c("ENSG00000142539-LINE1880-SPIB-D-N2",
"ENSG00000175832-LINE1927-ETV4-D-N1"),
min.cutoff = 'q1',
max.cutoff = 'q99',
pt.size = 0.1,
ncol = 2
)
DefaultAssay(seurat) <- "chromvar"
step1_chromVar <- DARS_ChromVar(ident.1 = "Light Zone GCBC",
ident.2 = "PC committed Light Zone GCBC")
step2_chromVar <- DARS_ChromVar(ident.1 = "PC committed Light Zone GCBC",
ident.2 = "IgG+ PC precursor")
step3_chromVar <- DARS_ChromVar(ident.1 = "IgG+ PC precursor",
ident.2 = "Mature PC")
step4_chromVar <- DARS_ChromVar(ident.1 = "class switch MBC",
ident.2 = "Mature PC")
step1_chromVar_filtered <- step1_chromVar[which(abs(step1_chromVar$avg_log2FC) >= 0.5 &
step1_chromVar$p_val_adj <= 10e-05),]
step2_chromVar_filtered <- step2_chromVar[which(abs(step2_chromVar$avg_log2FC) >= 0.5 &
step2_chromVar$p_val_adj <= 10e-05),]
step3_chromVar_filtered <- step2_chromVar[which(abs(step3_chromVar$avg_log2FC) >= 0.5 &
step3_chromVar$p_val_adj <= 10e-05),]
step4_chromVar_filtered <- step4_chromVar[which(abs(step4_chromVar$avg_log2FC) >= 0.5 &
step4_chromVar$p_val_adj <= 10e-05),]
DT::datatable(step1_chromVar_filtered)
DT::datatable(step2_chromVar_filtered)
DT::datatable(step3_chromVar_filtered)
DT::datatable(step4_chromVar_filtered)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] qlcMatrix_0.9.7 sparsesvd_0.2 slam_0.1-47 Matrix_1.3-4 EnhancedVolcano_1.13.2 ggrepel_0.9.1 gridExtra_2.3 plyr_1.8.6 writexl_1.4.0 ggpubr_0.4.0 data.table_1.14.0 forcats_0.5.0 stringr_1.4.0 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.0 ggplot2_3.3.5 dplyr_1.0.7 TFBSTools_1.26.0 JASPAR2020_0.99.10 pheatmap_1.0.12 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2 IRanges_2.22.1 S4Vectors_0.26.0 BiocGenerics_0.34.0 Signac_1.2.1 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20 R.utils_2.10.1 tidyselect_1.1.1 poweRlaw_0.70.6 RSQLite_2.2.1 AnnotationDbi_1.50.3 htmlwidgets_1.5.3 docopt_0.7.1 BiocParallel_1.22.0 Rtsne_0.15 munsell_0.5.0 codetools_0.2-17 ica_1.0-2 DT_0.16 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-2 Biobase_2.48.0 knitr_1.30 rstudioapi_0.11 ROCR_1.0-11 ggsignif_0.6.0 tensor_1.5 listenv_0.8.0 labeling_0.4.2 GenomeInfoDbData_1.2.3 polyclip_1.10-0 bit64_4.0.5 farver_2.1.0 rprojroot_2.0.2 parallelly_1.26.1 vctrs_0.3.8 generics_0.1.0 xfun_0.18 lsa_0.73.2 ggseqlogo_0.1 R6_2.5.0 bitops_1.0-7 spatstat.utils_2.2-0 DelayedArray_0.14.0
## [43] assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1 gtable_0.3.0 globals_0.14.0 goftest_1.2-2 seqLogo_1.54.3 rlang_0.4.11 RcppRoll_0.3.0 splines_4.0.3 rstatix_0.6.0 rtracklayer_1.48.0 lazyeval_0.2.2 broom_0.7.2 spatstat.geom_2.2-0 modelr_0.1.8 BiocManager_1.30.10 yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 crosstalk_1.1.1 backports_1.1.10 httpuv_1.6.1 tools_4.0.3 bookdown_0.21 ellipsis_0.3.2 spatstat.core_2.2-0 RColorBrewer_1.1-2 ggridges_0.5.3 Rcpp_1.0.6 zlibbioc_1.34.0 RCurl_1.98-1.2 rpart_4.1-15 deldir_0.2-10 pbapply_1.4-3 cowplot_1.1.1 zoo_1.8-9 haven_2.3.1 SummarizedExperiment_1.18.1 cluster_2.1.0 here_1.0.1 fs_1.5.0
## [85] magrittr_2.0.1 scattermore_0.7 openxlsx_4.2.4 reprex_0.3.0 lmtest_0.9-38 RANN_2.6.1 SnowballC_0.7.0 fitdistrplus_1.1-5 matrixStats_0.59.0 hms_0.5.3 patchwork_1.1.1 mime_0.11 evaluate_0.14 xtable_1.8-4 XML_3.99-0.3 rio_0.5.16 readxl_1.3.1 compiler_4.0.3 KernSmooth_2.23-17 crayon_1.4.1 R.oo_1.24.0 htmltools_0.5.1.1 mgcv_1.8-33 later_1.2.0 lubridate_1.7.9 DBI_1.1.0 tweenr_1.0.1 dbplyr_1.4.4 MASS_7.3-53 car_3.0-10 cli_3.0.0 R.methodsS3_1.8.1 igraph_1.2.6 pkgconfig_2.0.3 GenomicAlignments_1.24.0 TFMPvalue_0.0.8 foreign_0.8-80 plotly_4.9.4.1 spatstat.sparse_2.0-0 xml2_1.3.2 annotate_1.66.0 DirichletMultinomial_1.30.0
## [127] XVector_0.28.0 rvest_0.3.6 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.18 pracma_2.2.9 CNEr_1.24.0 spatstat.data_2.1-0 Biostrings_2.56.0 cellranger_1.1.0 rmarkdown_2.5 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 curl_4.3.2 shiny_1.6.0 Rsamtools_2.4.0 gtools_3.9.2 lifecycle_1.0.0 nlme_3.1-150 jsonlite_1.7.2 carData_3.0-4 viridisLite_0.4.0 BSgenome_1.56.0 fansi_0.5.0 pillar_1.6.1 lattice_0.20-41 KEGGREST_1.28.0 fastmap_1.1.0 httr_1.4.2 survival_3.2-7 GO.db_3.11.4 glue_1.4.2 zip_2.1.1 png_0.1-7 bit_4.0.4 ggforce_0.3.2 stringi_1.6.2 blob_1.2.1 caTools_1.18.2 memoise_1.1.0 irlba_2.3.3
## [169] future.apply_1.7.0